
#### HOUSEKEEPING ####

rm(list = ls())

library(tidyverse)
library(data.table)
library(scales)
library(zoo)
library(lubridate)

source("00_paths.R")
source(paste0(code_dir, "texout.R"))

## Masked parameters (to protect data provider's anonymity)
max_tier <- NA # maximum service tier
min_tier <- NA # minimum service tier
start_announce <- NA # start of announcement period
start_treat <- NA # start of treatment period
pp_label <- NA # policy period label positions
an_label <- NA # policy period labels positions
tr_label <- NA # policy period labels positions
start_announce_mon <- NA # first month of announcement
end_announce_mon <- NA # last month of announcement
price  <- NA # prices 
allow  <- NA # allowances
qtu    <- NA # quantity of a top-up
ptu    <- NA # price of a top-up
ptv    <- NA # price of TV subscription
med_tier <- NA # median service tier
max_ovr <- NA # maximum overage charge

#### LOAD DATA ####

## DAILY
sd_df <- fread(file = paste0(dat_dir, "subdate_final.csv"))

sd_df <- sd_df %>%
  mutate(gb_video = gb_youtube + gb_netflix + gb_slingtv + gb_hulu + gb_othervideo) %>%
  mutate(gb_other = tot_gb - gb_video - gb_browsing) %>%
  select(mkt,customer_key,date,tot_gb,gb_video,gb_browsing,gb_other,gb_netflix,
         gb_youtube,gb_hulu,gb_slingtv,svc_tier,vid_flag)

#### FIGURE 3: UBP RESPONSE - PLAN CHANGES ####

## Helper function to create plan change data frames
create_change_df <- function(df, var, compare_op, init_filter, init_val) {
  df %>%
    select(mkt, customer_key, date, !!sym(var)) %>%
    group_by(customer_key) %>%
    arrange(date) %>%
    mutate(flg = ifelse(1:n() == 1, 0, as.numeric(compare_op(!!sym(var), lag(!!sym(var)))))) %>%
    mutate(cume = cummax(flg)) %>%
    mutate(cur = as.numeric(cume==1 & lag(cume)==0)) %>%
    mutate(init_filter_var = max((1:n() == 1) & (!!sym(var)==init_val))) %>%
    filter(init_filter_var == 0) %>%
    ungroup() %>%
    group_by(mkt) %>%
    mutate(n = length(unique(customer_key))) %>%
    ungroup() %>%
    group_by(mkt, date) %>%
    summarise(cume = mean(cume), cur = mean(cur), cur2 = mean(flg), n = first(n), .groups = 'drop') %>%
    group_by(mkt) %>%
    arrange(date) %>%
    ungroup()
}

## Generate all plan change data frames
tier_up_df <- create_change_df(sd_df, "svc_tier", `>`, TRUE, max_tier)
tier_down_df <- create_change_df(sd_df, "svc_tier", `<`, TRUE, min_tier)
vid_add_df <- create_change_df(sd_df, "vid_flag", `>`, TRUE, 1)
vid_drop_df <- create_change_df(sd_df, "vid_flag", `<`, TRUE, 0)

## Aggregate to monthly and create plot data frames
agg_to_monthly <- function(df) {
  df %>%
    mutate(x = floor_date(date, unit="month")) %>%
    group_by(mkt, x) %>%
    summarise(y = sum(cur), n = first(n), .groups = 'drop')
}

tier_up_df2   <- agg_to_monthly(tier_up_df)
tier_down_df2 <- agg_to_monthly(tier_down_df)
vid_add_df2   <- agg_to_monthly(vid_add_df)
vid_drop_df2  <- agg_to_monthly(vid_drop_df)

## Base graph theme
base_graph <- list(
  geom_vline(xintercept = c(as.Date(start_announce), as.Date(start_treat)), linewidth = 0.5, color = "gray"),
  geom_line(aes(y = y, x = date, group = mkt, color = mkt, linetype = mkt)),
  geom_ribbon(aes(y = y, x = date, group = mkt, fill = mkt,
                  ymin = y-1.96*sqrt(y*(1-y)/n), ymax = y+1.96*sqrt(y*(1-y)/n)),
              color = NA, alpha = 0.15),
  theme_bw(),
  scale_color_manual(name = "", values = c("gray20", "gray50"), labels = c("Treated", "Control")),
  scale_fill_manual(name = "", values = c("gray20", "gray50"), labels = c("Treated", "Control")),
  scale_linetype_manual(name = "", values = c("solid", "twodash"), labels = c("Treated", "Control")),
  guides(color = guide_legend(override.aes = list(linetype = c("solid", "twodash"))),
         fill = guide_legend(override.aes = list(linetype = c("solid", "twodash"))),
         linetype = guide_legend()),
  theme(axis.text=element_text(size=rel(0.75)), axis.text.x = element_blank(), axis.ticks.x = element_blank(),
        axis.title=element_text(size=rel(0.75)), legend.position = c(0.83,0.09),
        legend.text=element_text(size=rel(0.6)), legend.spacing.x = unit(0, "mm"),
        legend.spacing.y = unit(0, "mm"), legend.key.size = unit(3, "mm"), legend.title = element_blank(),
        text = element_text(family = "Times New Roman"), panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank())
)

## Annotation helper
annot_layer <- function(ylim_top) {
  list(
    annotate("text", x = as.Date(pp_label), y = ylim_top, label = "Pre-policy", hjust = 0, size = 2.5, family = "Times New Roman"),
    annotate("text", x = as.Date(an_label), y = ylim_top, label = "Announcement", hjust = 0, size = 2.5, family = "Times New Roman"),
    annotate("text", x = as.Date(tr_label), y = ylim_top, label = "Treatment", hjust = 0, size = 2.5, family = "Times New Roman")
  )
}

## Generate all figures
fig_specs <- list(
  list(data = tier_up_df2, ylabel = "Tier Upgrades", ylim = c(0,0.015), fname = "Figure3c"),
  list(data = tier_down_df2, ylabel = "Tier Downgrades", ylim = c(0,0.015), fname = "Figure3d"),
  list(data = vid_add_df2, ylabel = "TV Adds", ylim = c(0,0.01), fname = "Figure3a"),
  list(data = vid_drop_df2, ylabel = "TV Drops", ylim = c(0,0.01), fname = "Figure3b")
)

for (spec in fig_specs) {
  ggplot(spec$data %>% mutate(date=x)) +
    base_graph +
    labs(x = "Time", y = spec$ylabel) +
    coord_cartesian(ylim = spec$ylim) +
    annot_layer(spec$ylim[2] * 1.023)
  ggsave(filename = paste0(fig_dir, spec$fname, ".png"), height=3, width=3, device="png")
}

#### TABLE 2: UBP RESPONSE - USAGE ####

sm_df <- sd_df %>%
  mutate(month = month(date)) %>%
  group_by(customer_key, month) %>%
  arrange(date) %>%
  mutate(vid_flag = last(vid_flag)) %>%
  mutate(svc_tier = last(svc_tier)) %>%
  mutate(across(c(tot_gb, starts_with("gb_")), ~ sum(.x, na.rm=T))) %>%
  ungroup() %>%
  dplyr::select(mkt, customer_key, month, vid_flag, svc_tier, tot_gb, starts_with("gb_")) %>%
  distinct()

sm_df$Period <- ifelse(sm_df$month<start_announce_mon, 0, ifelse(sm_df$month>end_announce_mon, 2, 1))

get_stats <- function(x) {
  c(
    mean(x),
    sd(x),
    quantile(x, probs = c(0.01,0.25,0.5,0.75,0.99)),
    length(x)
  )
}

mean_did <- function(var, df=sm_df) {
  c(
    get_stats(sm_df[sm_df$mkt=="Treated" & sm_df$Period==0,][[var]])[1],
    get_stats(sm_df[sm_df$mkt=="Treated" & sm_df$Period==2,][[var]])[1],
    get_stats(sm_df[sm_df$mkt!="Treated" & sm_df$Period==0,][[var]])[1],
    get_stats(sm_df[sm_df$mkt!="Treated" & sm_df$Period==2,][[var]])[1]
  )
}

did_df <- data.frame(
  rbind(
    mean_did("gb_video", sm_df),
    mean_did("gb_netflix", sm_df),
    mean_did("gb_youtube", sm_df),
    mean_did("gb_hulu", sm_df),
    mean_did("gb_slingtv", sm_df),
    mean_did("gb_browsing", sm_df),
    mean_did("gb_other", sm_df),
    mean_did("tot_gb", sm_df)
  )
) %>%
  mutate(DID = (V2-V1) - (V4-V3))

types <- c("Video", "\\hspace{.3cm}Netflix", "\\hspace{.3cm}YouTube",
           "\\hspace{.3cm}Hulu", "\\hspace{.3cm}Sling TV", "Browsing", "Other",
           "Total")
texout(x = did_df, r = types, n=2)

#### TABLE 1: USAGE AND PLAN CHOICE DURING PRE-POLICY PERIOD ####

sm_df$svc_tier_grp <- ifelse(sm_df$svc_tier<med_tier, "Low",
                             ifelse(sm_df$svc_tier>med_tier, "High", "Median"))
sm_df$svc_tier_low <- sm_df$svc_tier_grp=="Low"
sm_df$svc_tier_median <- sm_df$svc_tier_grp=="Median"
sm_df$svc_tier_high <- sm_df$svc_tier_grp=="High"

## Panel 1: GB Total Monthly Usage
usage_df <- data.frame(
  cbind(
    get_stats(sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$tot_gb)[1:7],
    get_stats(sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$tot_gb)[1:7],
    get_stats(sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$tot_gb)[1:7],
    get_stats(sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$tot_gb)[1:7]
  )
)

stats <- c("\\hspace{.3cm}Mean", "\\hspace{.3cm}Standard Deviation",
           "\\hspace{.3cm}1st Percentile", "\\hspace{.3cm}25th Percentile",
           "\\hspace{.3cm}Median", "\\hspace{.3cm}75th Percentile",
           "\\hspace{.3cm}99th Percentile")

texout(x = usage_df, r = stats, n=2)

## Panel 2: Subscription Choices
grp_means <- function(var, df=sm_df) {
  c(
    get_stats(sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==0,][[var]])[1],
    get_stats(sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==0,][[var]])[1],
    get_stats(sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==1,][[var]])[1],
    get_stats(sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==1,][[var]])[1]
  )
}

shares_df <- data.frame(
  rbind(
    grp_means("svc_tier_low", sm_df %>% filter(Period==0)),
    grp_means("svc_tier_median", sm_df %>% filter(Period==0)),
    grp_means("svc_tier_high", sm_df %>% filter(Period==0))
  )
)
tiers <- c("\\hspace{.3cm}Low Speed Internet", "\\hspace{.3cm}Median Speed Internet", "\\hspace{.3cm}High Speed Internet")
texout(x = shares_df, r = tiers, n=2)

# monthly bill
ovrcalc <- function(x,vid_flag,p=price,a=allow,qtu=qtu,ptu=ptu,ptv=ptv,max_ovr=max_ovr) {
  # returns total price of a usage level x on each tier
  excess <- ifelse(x<=a, 0, x-a)
  ntu    <- floor(excess/qtu)
  ovr    <- ifelse(ptu*ntu < max_ovr, ptu*ntu, max_ovr)
  return(p + ovr + vid_flag*ptv)
}

bill_df <- data.frame(
  cbind(
    mean(ovrcalc(x=sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$tot_gb,
                 vid_flag=0,
                 p=price[sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$svc_tier],
                 a=allow[sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$svc_tier],
                 qtu=qtu,
                 ptu=0,
                 ptv=ptv)),
    mean(ovrcalc(x=sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$tot_gb,
                 vid_flag=0,
                 p=price[sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$svc_tier],
                 a=allow[sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$svc_tier],
                 qtu=qtu,
                 ptu=0,
                 ptv=ptv)),
    mean(ovrcalc(x=sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$tot_gb,
                 vid_flag=sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$vid_flag,
                 p=price[sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$svc_tier],
                 a=allow[sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$svc_tier],
                 qtu=qtu,
                 ptu=0,
                 ptv=ptv)),
    mean(ovrcalc(x=sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$tot_gb,
                 vid_flag=sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$vid_flag,
                 p=price[sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$svc_tier],
                 a=allow[sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$svc_tier],
                 qtu=qtu,
                 ptu=0,
                 ptv=ptv))
  )
)

texout(x = bill_df, r = "Total Bill", n=2)

## Panel 3: Mean Usage by Category
categories_df <- data.frame(
  rbind(
    grp_means("gb_video", sm_df %>% filter(Period==0)),
    grp_means("gb_browsing", sm_df %>% filter(Period==0)),
    grp_means("gb_other", sm_df %>% filter(Period==0))
  )
)
cats <- c("\\hspace{.3cm}Video", "\\hspace{.3cm}Browsing", "\\hspace{.3cm}Other")
texout(x = categories_df, r = cats, n=2)

## Panel 4: Mean Usage of OTT Services
video_df <- data.frame(
  rbind(
    grp_means("gb_netflix", sm_df %>% filter(Period==0)),
    grp_means("gb_youtube", sm_df %>% filter(Period==0)),
    grp_means("gb_hulu", sm_df %>% filter(Period==0)),
    grp_means("gb_slingtv", sm_df %>% filter(Period==0))
  )
)
vids <- c("\\hspace{.3cm}Netflix", "\\hspace{.3cm}YouTube", "\\hspace{.3cm}Hulu", "\\hspace{.3cm}Sling TV")
texout(x = video_df, r = vids, n=2)

n_df <- data.frame(
  cbind(
    get_stats(unique(sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$customer_key))[8],
    get_stats(unique(sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==0 & sm_df$Period==0,]$customer_key))[8],
    get_stats(unique(sm_df[sm_df$mkt=="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$customer_key))[8],
    get_stats(unique(sm_df[sm_df$mkt!="Treated" & sm_df$vid_flag==1 & sm_df$Period==0,]$customer_key))[8]
  )
)
texout(x = n_df, r = "Household Count", n=0)
